library(SpatialExperiment)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(spatialLIBD)
library(ggplot2)
spe_weighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/spe_weighted_nnSVG_2.rds")
spe_unweighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/spe_nnSVG_2.rds")
adj_p_values_weighted <- p.adjust(1 - pchisq(rowData(spe_weighted)$weighted_LR_stat, df=2), method = "BH")
rowData(spe_weighted)$weighted_padj <- adj_p_values_weighted
# guiding question: understanding the difference between the weighted and unweighted simulations. why is the weighted simulation more conservative and lower TNR? why does the unweighted simulation capture the SVGs perfectly?
# make a 2x2 table of SVGs with high and low mean and high and low sigma.sq
# find mean of ground_truth_sigma.sq for nonzero values
nonzero_mean <- mean(rowData(spe_unweighted)$ground_truth_sigma.sq[rowData(spe_unweighted)$ground_truth_sigma.sq > 0], na.rm = TRUE)
# find mean of mean
mean_mean <- mean(rowData(spe_unweighted)$mean, na.rm = TRUE)
svg_high_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean > mean_mean, na.rm = TRUE)
svg_high_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean > mean_mean, na.rm = TRUE)
svg_low_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean < mean_mean, na.rm = TRUE)
svg_low_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean < mean_mean, na.rm = TRUE)
svg_table <- matrix(c(svg_high_mean_high_sigma_sq, svg_high_mean_low_sigma_sq, svg_low_mean_high_sigma_sq, svg_low_mean_low_sigma_sq), nrow = 2, byrow = TRUE)
svg_table
## [,1] [,2]
## [1,] 194 144
## [2,] 162 200
# are the weighted and unweighted simulations capturing the same SVGs?
tp_unweighted <- which(rowData(spe_unweighted)$ground_truth_sigma.sq != 0 & rowData(spe_unweighted)$padj <= 0.05)
tp_weighted <- which(rowData(spe_weighted)$ground_truth_sigma.sq != 0 & rowData(spe_weighted)$weighted_padj <= 0.05)
sum(tp_unweighted == tp_weighted)
## [1] 700
length(tp_unweighted)
## [1] 700
length(tp_weighted)
## [1] 700
# yes, they are capturing the all the SVGs and the same SVGs
# are the weighted and unweighted simulations capturing the same non-SVGs?
tn_unweighted <- rowData(spe_unweighted)$ground_truth_sigma.sq == 0 & rowData(spe_unweighted)$padj > 0.05
tn_weighted <- rowData(spe_weighted)$ground_truth_sigma.sq == 0 & rowData(spe_weighted)$weighted_padj > 0.05
sum(tn_unweighted == tn_weighted)
## [1] 961
sum(tn_unweighted)
## [1] 290
sum(tn_weighted)
## [1] 253
#no, the weighted simulation is capturing fewer true negatives
# dig more into the differences in true negatives between the weighted and unweighted simulations
# find the gene indices of the true negatives that are not getting captured by the weighted simulation
tn_unweighted_not_weighted <- which(tn_unweighted & !tn_weighted)
length(tn_unweighted_not_weighted) # = 38
## [1] 38
# find the gene indices of the true negatives that are not getting captured by the unweighted simulation
tn_weighted_not_unweighted <- which(tn_weighted & !tn_unweighted)
length(tn_weighted_not_unweighted) # = 1
## [1] 1
# find the gene indices of the true negatives that are getting captured by both simulations
tn_both <- which(tn_weighted & tn_unweighted)
length(tn_both) # = 252
## [1] 252
# find the dist of the mean of the true negatives that are not getting captured by the weighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_unweighted_not_weighted], na.rm = TRUE)
## [1] 0.5428591 1.0027867 1.9281350 2.4914284 2.9097387
# find the dist of the mean of the true negatives that are not getting captured by the unweighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_weighted_not_unweighted], na.rm = TRUE)
## [1] 2.318769 2.318769 2.318769 2.318769 2.318769
# find the dist of the mean of the true negatives that are getting captured by both simulations
fivenum(rowData(spe_unweighted)$mean[tn_both], na.rm = TRUE)
## [1] 0.4299314 0.7665668 1.2582967 1.9123613 2.9213226
# means are a bit higher for the true negatives that are not getting captured by the weighted simulation
# find the distribution of mean weights for the true negatives that are not getting captured by the weighted simulation
# weights is spots x genes
weights <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/weights_2.rds")
fivenum(weights[,tn_unweighted_not_weighted])
## [1] 1.624791 2.229350 4.965491 7.205001 34.948928
fivenum(weights[,tn_weighted_not_unweighted])
## [1] 4.621942 5.471900 5.742093 6.191111 34.948928
fivenum(weights[,tn_both])
## [1] 1.624791 2.016317 2.441631 4.477998 34.948928
fivenum(colMeans(weights[,tn_unweighted_not_weighted]))
## [1] 1.885011 2.243264 4.953570 8.231561 16.867777
fivenum(weights[,tn_weighted_not_unweighted]) # there is only 1 gene
## [1] 4.621942 5.471900 5.742093 6.191111 34.948928
fivenum(colMeans(weights[,tn_both]))
## [1] 1.675474 2.000548 2.470684 4.601287 14.296343
# weights are a bit higher for the true negatives that are not getting captured by the weighted simulation
# create some spot plots to visualize the differences
for (ix in tn_weighted_not_unweighted) {
df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres,
color = expr)) +
geom_point(size = 2) +
coord_fixed() +
scale_y_reverse() +
scale_color_viridis_c(name = "logcounts") +
ggtitle("") +
theme_bw() +
theme(plot.title = element_text(face = "italic"),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
)
}

# create some spot plots to visualize the differences
for (ix in tn_unweighted_not_weighted) {
df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres,
color = expr)) +
geom_point(size = 2) +
coord_fixed() +
scale_y_reverse() +
scale_color_viridis_c(name = "logcounts") +
ggtitle("") +
theme_bw() +
theme(plot.title = element_text(face = "italic"),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
)
}



















